hysop.backend.device.codegen.kernels.transpose module

class hysop.backend.device.codegen.kernels.transpose.TransposeKernelGenerator(typegen, ctype, vectorization, axes, tile_size, tile_padding, symbolic_mode, use_diagonal_coordinates=True, is_inplace=False, known_vars=None, debug_mode=False, tuning_mode=False, **kargs)[source]

Bases: KernelCodeGenerator

build_requirements()[source]
cache_alloc_bytes(local_size)[source]
classmethod characterize_permutation(shape, axes, max_device_workdim)[source]
static codegen_name(is_inplace, axes, ctype, tile_size, tile_padding, vectorization, use_diagonal_coordinates)[source]
classmethod compute_global_size(shape, tile_size, vectorization, axes, local_work_size, work_load)[source]
gen_kernel_arguments(typegen, ctype, Pdim, debug_mode, is_inplace, known_vars, symbolic_mode)[source]
gencode()[source]
classmethod max_local_worksize(shape, work_dim, tile_size, vectorization, axes)[source]
n_dbg_arrays = 2
required_cache_size()[source]
required_workgroup_cache_size()[source]

Return a tuple of required (static,dynamic,total) cache bytes per workgroup